#include "opencl_source_map.hpp" 
namespace MNN { 
const char* buffer_to_image = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS __private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"__constant sampler_t SAMPLER=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
"// convert kernel : from buffer(oi ) to image(oc,ic/4)\n"
"__kernel void conv2d1x1_opt_filter_buffer_to_image(GLOBAL_SIZE_2_DIMS __global const FLOAT *input_ptr,\n"
" __private const int input_channel,__private const int2 kernel_shape,__private const int ic_h_w_size,\n"
" __private const int height_width_size,__write_only image2d_t output) {\n"
" \n"
" int ic_4_idx=get_global_id(0); // ic/4\n"
" int oc_idx=get_global_id(1); // oc\n"
" DEAL_NON_UNIFORM_DIM2(ic_4_idx,oc_idx);\n"
" const int ic_idx=ic_4_idx*4;\n"
" const int buffer_offset=oc_idx*input_channel+ic_idx;\n"
" \n"
" FLOAT4 output_values=0;\n"
" if (ic_idx<input_channel) {\n"
" const int remain_channel=input_channel-ic_idx;\n"
" if (remain_channel >= 4) {\n"
" output_values.x=*(input_ptr+buffer_offset);\n"
" output_values.y=*(input_ptr+buffer_offset+1);\n"
" output_values.z=*(input_ptr+buffer_offset+2);\n"
" output_values.w=*(input_ptr+buffer_offset+3);\n"
" } else if (remain_channel == 3) {\n"
" output_values.x=*(input_ptr+buffer_offset);\n"
" output_values.y=*(input_ptr+buffer_offset+1);\n"
" output_values.z=*(input_ptr+buffer_offset+2);\n"
" output_values.w=0;\n"
" } else if (remain_channel == 2) {\n"
" output_values.x=*(input_ptr+buffer_offset);\n"
" output_values.y=*(input_ptr+buffer_offset+1);\n"
" output_values.z=0;\n"
" output_values.w=0;\n"
" } else if (remain_channel == 1) {\n"
" output_values.x=*(input_ptr+buffer_offset);\n"
" output_values.y=0;\n"
" output_values.z=0;\n"
" output_values.w=0;\n"
" }\n"
" }\n"
" WI_F(output,(int2)(ic_4_idx,oc_idx),output_values);\n"
"}\n"
"// convert kernel : from buffer(oihw) to image(oc/4 h w ,ic oc4)\n"
"__kernel void conv2d_filter_buffer_to_image(GLOBAL_SIZE_2_DIMS\n"
" #ifdef BUFFER_INP_FP32\n"
" __global const float *input_ptr,\n"
" #else\n"
" __global const FLOAT *input_ptr,\n"
" #endif\n"
" __private const int output_channel,__private const int2 kernel_shape,__private const int ic_h_w_size,\n"
" __private const int height_width_size,__write_only image2d_t output) {\n"
" int image_width_idx=get_global_id(0); // ic\n"
" int image_height_idx=get_global_id(1); // oc/4 h w\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int input_channel_4_idx=image_width_idx;\n"
" const int output_channel_4_idx=(image_height_idx/height_width_size)*4;\n"
" const int height_width_idx=image_height_idx % height_width_size;\n"
" const int buffer_height_idx=height_width_idx/kernel_shape.y;\n"
" const int buffer_width_idx=height_width_idx % kernel_shape.y;\n"
" const int buffer_offset=output_channel_4_idx*ic_h_w_size+input_channel_4_idx*height_width_size +\n"
" buffer_height_idx*kernel_shape.y+buffer_width_idx;\n"
" FLOAT4 output_values=0;\n"
" if (output_channel_4_idx<output_channel) {\n"
" const int remain_channel=output_channel-output_channel_4_idx;\n"
" if (remain_channel >= 4) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" offset += ic_h_w_size;\n"
" output_values.z=(FLOAT)(*(input_ptr+offset));\n"
" offset += ic_h_w_size;\n"
" output_values.w=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" offset += ic_h_w_size;\n"
" output_values.z=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" }\n"
" }\n"
" WI_F(output,(int2)(image_width_idx,image_height_idx),output_values);\n"
"}\n"
"// only for debug\n"
"// convert kernel : from image(oc/4 h w ,ic oc4) to buffer(oihw)\n"
"__kernel void conv2d_filter_image_to_buffer(GLOBAL_SIZE_2_DIMS __global FLOAT *output_ptr,\n"
" __private const int output_channel,__private const int2 kernel_shape,\n"
" __private const int ic_h_w_size,\n"
" __private const int height_width_size,__read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int input_channel_4_idx=image_width_idx;\n"
" const int output_channel_4_idx=image_height_idx/height_width_size*4;\n"
" const int height_width_idx=image_height_idx % height_width_size;\n"
" const int buffer_height_idx=height_width_idx/kernel_shape.y;\n"
" const int buffer_width_idx=height_width_idx % kernel_shape.y;\n"
" const int buffer_offset=output_channel_4_idx*ic_h_w_size+input_channel_4_idx*height_width_size +\n"
" buffer_height_idx*kernel_shape.y+buffer_width_idx;\n"
" if (output_channel_4_idx<output_channel) {\n"
" int2 coord=(int2)(image_width_idx,image_height_idx);\n"
" FLOAT4 values=RI_F(input_ptr,SAMPLER,coord);\n"
" const int remain_channel=(output_channel-output_channel_4_idx);\n"
" if (remain_channel >= 4) {\n"
" int offset=buffer_offset;\n"
" output_ptr[offset]=values.x;\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_ptr[offset]=values.y;\n"
" offset += ic_h_w_size;\n"
" output_ptr[offset]=values.z;\n"
" offset += ic_h_w_size;\n"
" output_ptr[offset]=values.w;\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output_ptr[offset]=values.x;\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_ptr[offset]=values.y;\n"
" offset += ic_h_w_size;\n"
" output_ptr[offset]=values.z;\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output_ptr[offset]=values.x;\n"
" offset=mad24(1,ic_h_w_size,offset);\n"
" output_ptr[offset]=values.y;\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output_ptr[offset]=values.x;\n"
" }\n"
" }\n"
"}\n"
"// convert kernel from buffer(mihw) to image(ic/4,ic4 h w m)\n"
"// but now dw only support m == 1\n"
"__kernel void dw_filter_buffer_to_image(GLOBAL_SIZE_2_DIMS\n"
" #ifdef BUFFER_INP_FP32\n"
" __global const float *input_ptr,\n"
" #else\n"
" __global const FLOAT *input_ptr,\n"
" #endif\n"
" __private const int4 kernel_shape,\n"
" __private const int height_width_size,__write_only image2d_t output) {\n"
" const int image_width_idx=get_global_id(0);\n"
" const int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" FLOAT4 output_values=0;\n"
" if (kernel_shape.x == 1) {\n"
" const int input_channel_4_idx=image_height_idx*4;\n"
" const int buffer_height_idx=image_width_idx/kernel_shape.w;\n"
" const int buffer_width_idx=image_width_idx % kernel_shape.w;\n"
" const int buffer_offset =\n"
" mad24(mad24(input_channel_4_idx,kernel_shape.z,buffer_height_idx),kernel_shape.w,buffer_width_idx);\n"
" const int remain_channel=kernel_shape.y-input_channel_4_idx;\n"
" if (input_channel_4_idx<kernel_shape.y) {\n"
" if (remain_channel >= 4) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.z=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.w=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.z=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" offset += height_width_size;\n"
" output_values.y=(FLOAT)(*(input_ptr+offset));\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output_values.x=(FLOAT)(*(input_ptr+offset));\n"
" }\n"
" }\n"
" }\n"
" WI_F(output,(int2)(image_width_idx,image_height_idx),output_values);\n"
"}\n"
"__kernel void nc4hw4_buffer_to_image(GLOBAL_SIZE_2_DIMS\n"
" __global const INPUT_TYPE *input_ptr,\n"
" __private const int2 output_shape,\n"
" __private const int batch_size,__write_only image2d_t output) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/output_shape.x;\n"
" const int height_idx=image_height_idx % output_shape.x;\n"
" const int width_idx=image_width_idx % output_shape.y;\n"
" const int channel_block_idx=image_width_idx/output_shape.y;\n"
" int buffer_offset =\n"
" (((batch_idx+channel_block_idx*batch_size)*output_shape.x+height_idx)*output_shape.y+width_idx)*4;\n"
" int2 coord=(int2)(image_width_idx,image_height_idx);\n"
" WI_DATA(output,coord,CONVERT_OUTPUT_I4(vload4(0,input_ptr+buffer_offset)));\n"
"}\n"
"__kernel void image_to_nc4hw4_buffer(GLOBAL_SIZE_2_DIMS\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int2 output_shape,\n"
" __private const int batch_size,\n"
" __read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/output_shape.x;\n"
" const int height_idx=image_height_idx % output_shape.x;\n"
" const int width_idx=image_width_idx % output_shape.y;\n"
" int channel_block_idx=image_width_idx/output_shape.y;\n"
" int buffer_offset =\n"
" (((batch_idx+channel_block_idx*batch_size)*output_shape.x+height_idx)*output_shape.y+width_idx)*4;\n"
" int2 coord=(int2)(image_width_idx,image_height_idx);\n"
" vstore4(CONVERT_OUTPUT4(RI_DATA(input_ptr,SAMPLER,coord)),0,output+buffer_offset);\n"
"}\n"
"__kernel void nhwc_buffer_to_image(GLOBAL_SIZE_2_DIMS\n"
" __global const INPUT_TYPE *input_ptr,\n"
" __private const int height,\n"
" __private const int width,__private const int channels,\n"
" __write_only image2d_t output) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/height;\n"
" const int height_idx=image_height_idx % height;\n"
" const int width_idx=image_width_idx % width;\n"
" const int channel_4_idx=(image_width_idx/width) << 2;\n"
" const int buffer_offset=((batch_idx*height+height_idx)*width+width_idx)*channels+channel_4_idx;\n"
" const int remain_channel=channels-channel_4_idx;\n"
" INPUT_TYPE4 values=vload4(0,input_ptr+buffer_offset);\n"
" if (remain_channel == 3) {\n"
" values.w=0;\n"
" } else if (remain_channel == 2) {\n"
" values.z=0;\n"
" values.w=0;\n"
" } else if (remain_channel == 1) {\n"
" values.y=0;\n"
" values.z=0;\n"
" values.w=0;\n"
" }\n"
" WI_DATA(output,(int2)(image_width_idx,image_height_idx),CONVERT_OUTPUT_I4(values));\n"
"}\n"
"__kernel void nchw_buffer_to_image(GLOBAL_SIZE_2_DIMS\n"
" __global const INPUT_TYPE *input_ptr,\n"
" __private const int height,__private const int width,__private const int channels,\n"
" __write_only image2d_t output) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/height;\n"
" const int height_idx=image_height_idx % height;\n"
" const int width_idx=image_width_idx % width;\n"
" const int channel_4_idx=image_width_idx/width << 2;\n"
" const int buffer_offset=((batch_idx*channels+channel_4_idx)*height+height_idx)*width+width_idx;\n"
" const int remain_channel=channels-channel_4_idx;\n"
" const int height_width_size=height*width;\n"
" INPUT_TYPE4 output_values=0;\n"
" if (remain_channel >= 4) {\n"
" int offset=buffer_offset;\n"
" output_values.x=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.y=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.z=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.w=*(input_ptr+offset);\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output_values.x=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.y=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.z=*(input_ptr+offset);\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output_values.x=*(input_ptr+offset);\n"
" offset += height_width_size;\n"
" output_values.y=*(input_ptr+offset);\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output_values.x=*(input_ptr+offset);\n"
" }\n"
" WI_DATA(output,(int2)(image_width_idx,image_height_idx),CONVERT_OUTPUT_I4(output_values));\n"
"}\n"
"__kernel void image_to_nhwc_buffer(GLOBAL_SIZE_2_DIMS\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int height,__private const int width,\n"
" __private const int channels,\n"
" __read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/height;\n"
" const int height_idx=image_height_idx % height;\n"
" const int width_idx=image_width_idx % width;\n"
" const int channel_4_idx=(image_width_idx/width) << 2;\n"
" const int buffer_offset=((batch_idx*height+height_idx)*width+width_idx)*channels+channel_4_idx;\n"
" int2 coord=(int2)(image_width_idx,image_height_idx);\n"
" \n"
" INPUT_TYPE_I4 values=RI_DATA(input_ptr,SAMPLER,coord);\n"
" const int remain_channel=channels-channel_4_idx;\n"
" if (remain_channel >= 4) {\n"
" vstore4(CONVERT_OUTPUT4(values),0,output+buffer_offset);\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.z;\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" }\n"
"}\n"
"__kernel void image_to_nchw_buffer(GLOBAL_SIZE_2_DIMS\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int height,__private const int width,\n"
" __private const int channels,\n"
" __read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int batch_idx=image_height_idx/height;\n"
" const int height_idx=image_height_idx % height;\n"
" const int width_idx=image_width_idx % width;\n"
" int channel_4_idx=(image_width_idx/width)*4;\n"
" int buffer_offset=((batch_idx*channels+channel_4_idx)*height+height_idx)*width+width_idx;\n"
" \n"
" INPUT_TYPE_I4 values=RI_DATA(input_ptr,SAMPLER,(int2)(image_width_idx,image_height_idx));\n"
" const int height_width_size=height*width;\n"
" const int remain_channel=channels-channel_4_idx;\n"
" if (remain_channel >= 4) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.z;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.w;\n"
" } else if (remain_channel == 3) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.z;\n"
" } else if (remain_channel == 2) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset += height_width_size;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" } else if (remain_channel == 1) {\n"
" int offset=buffer_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" }\n"
"}\n"
"// convert arg as 4 alignment\n"
"__kernel void arg_buffer_to_image(GLOBAL_SIZE_2_DIMS __global const INPUT_TYPE *input_ptr,__private const int count,\n"
" __write_only image2d_t output) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int buffer_4_offset=image_width_idx << 2;\n"
" const int remain=count-buffer_4_offset;\n"
" int offset=buffer_4_offset;\n"
" INPUT_TYPE4 values=0;\n"
" if (remain >= 4) {\n"
" values=vload4(0,input_ptr+offset);\n"
" } else if (remain == 3) {\n"
" values.x=*(input_ptr+offset);\n"
" offset++;\n"
" values.y=*(input_ptr+offset);\n"
" offset++;\n"
" values.z=*(input_ptr+offset);\n"
" } else if (remain == 2) {\n"
" values.x=*(input_ptr+offset);\n"
" offset++;\n"
" values.y=*(input_ptr+offset);\n"
" } else if (remain == 1) {\n"
" values.x=*(input_ptr+offset);\n"
" }\n"
" WI_DATA(output,(int2)(image_width_idx,image_height_idx),CONVERT_OUTPUT_I4(values));\n"
"}\n"
"// only for debug\n"
"__kernel void arg_image_to_buffer(GLOBAL_SIZE_2_DIMS __global OUTPUT_TYPE *output,__private const int count,\n"
" __read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" const int buffer_4_offset=image_width_idx << 2;\n"
" int2 coord=(int2)(image_width_idx,image_height_idx);\n"
" INPUT_TYPE_I4 values=RI_DATA(input_ptr,SAMPLER,coord);\n"
" const int remain=count-buffer_4_offset;\n"
" if (remain<4) {\n"
" switch (remain) {\n"
" case 3:\n"
" output[buffer_4_offset+2]=(OUTPUT_TYPE)values.s2;\n"
" case 2:\n"
" output[buffer_4_offset+1]=(OUTPUT_TYPE)values.s1;\n"
" case 1:\n"
" output[buffer_4_offset]=(OUTPUT_TYPE)values.s0;\n"
" }\n"
" } else {\n"
" vstore4(CONVERT_OUTPUT4(values),0,output+buffer_4_offset);\n"
" }\n"
" if (remain >= 4) {\n"
" vstore4(CONVERT_OUTPUT4(values),0,output+buffer_4_offset);\n"
" } else if (remain == 3) {\n"
" int offset=buffer_4_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.z;\n"
" } else if (remain == 2) {\n"
" int offset=buffer_4_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" offset++;\n"
" output[offset]=(OUTPUT_TYPE)values.y;\n"
" } else if (remain == 1) {\n"
" int offset=buffer_4_offset;\n"
" output[offset]=(OUTPUT_TYPE)values.x;\n"
" }\n"
"}\n"
"__kernel void image_to_image(GLOBAL_SIZE_2_DIMS\n"
" __write_only image2d_t output_ptr,\n"
" __read_only image2d_t input_ptr) {\n"
" int image_width_idx=get_global_id(0);\n"
" int image_height_idx=get_global_id(1);\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(image_width_idx,image_height_idx);\n"
" \n"
" INPUT_TYPE_I4 values=RI_DATA(input_ptr,SAMPLER,(int2)(image_width_idx,image_height_idx));\n"
" WI_DATA(output_ptr,(int2)(image_width_idx,image_height_idx),CONVERT_OUTPUT_I4(values));\n"
"}\n"
;
}
